{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Задание по теме «Коллаборативная фильтрация»\n",
    "\n",
    "ПАКЕТ SURPRISE\n",
    "\n",
    "- используйте данные MovieLens 1M\n",
    "- можно использовать любые модели из пакета\n",
    "- получите RMSE на тестовом сете 0.87 и ниже"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from surprise import Reader, Dataset, SVD\n",
    "from surprise.model_selection import GridSearchCV, cross_validate\n",
    "import pandas as pd\n",
    "from sklearn.metrics.pairwise import cosine_similarity, cosine_distances\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "import re\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Создадим датафрейм с форматом пригодным для библиотеки Surprise. Для первичного подбора параметров загрузим датасет на 100К."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>title</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Heat (1995)</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId                    title  rating\n",
       "0       1         Toy Story (1995)     4.0\n",
       "1       1  Grumpier Old Men (1995)     4.0\n",
       "2       1              Heat (1995)     4.0"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_movies = pd.read_csv('movielens_100k\\\\movies.csv')\n",
    "df_ratings = pd.read_csv('movielens_100k\\\\ratings.csv')\n",
    "df_ratings = df_ratings.merge(df_movies.loc[:, ['movieId', 'title']], \n",
    "                              on='movieId', how='left').loc[:, ['userId', 'title', 'rating']]\n",
    "df_ratings[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Найдем минимальную и максимальную оценки для калибровки объекта класса Reader."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Минимальный рейтинг: 0.5\n",
      "Максимальный рейтинг: 5.0\n"
     ]
    }
   ],
   "source": [
    "rating_min, rating_max = df_ratings.rating.min(), df_ratings.rating.max()\n",
    "print(\"Минимальный рейтинг: {}\\nМаксимальный рейтинг: {}\".format(rating_min, rating_max))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Создадим датасет в формате библиотеки Surprise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "reader = Reader(rating_scale=(rating_min, rating_max))\n",
    "data = Dataset.load_from_df(df_ratings, reader)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Попробуем побучить модель SVD и предсказать оценку пользователя к каждому фильму"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating RMSE, MAE of algorithm SVD on 5 split(s).\n",
      "\n",
      "                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     \n",
      "RMSE (testset)    0.8832  0.8667  0.8724  0.8720  0.8751  0.8739  0.0054  \n",
      "MAE (testset)     0.6794  0.6653  0.6707  0.6691  0.6723  0.6713  0.0046  \n",
      "Fit time          7.76    7.57    10.01   8.13    6.30    7.95    1.20    \n",
      "Test time         0.24    0.21    0.29    0.21    0.21    0.23    0.03    \n"
     ]
    }
   ],
   "source": [
    "model_svd = SVD()\n",
    "_ = cross_validate(model_svd, data, cv=5, verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8623182447822622\n",
      "{'n_factors': 200, 'n_epochs': 40, 'lr_all': 0.05}\n"
     ]
    }
   ],
   "source": [
    "\"\"\"\n",
    "!!! Блок выполняется около 1-2 часов\n",
    "\"\"\"\n",
    "param_grid = {\n",
    "    'n_factors': [10, 25, 50, 75, 100, 150, 200],\n",
    "    'n_epochs': [20, 40, 60],\n",
    "    'lr_all': [0.005, 0.01, 0.05]\n",
    "}\n",
    "\n",
    "gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5)\n",
    "gs.fit(data)\n",
    "\n",
    "# best RMSE score\n",
    "print(gs.best_score['rmse'])\n",
    "\n",
    "# combination of parameters that gave the best RMSE score\n",
    "print(gs.best_params['rmse'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Поиск наилучших параметров с помощью решетчатого поиска дал следующие результаты:\n",
    "\n",
    "- Количество факторов 200\n",
    "- Количество эпох 40\n",
    "- Скорость обучения для всех параметров SVD 0.05\n",
    "\n",
    "Данные параметры дают RMSE 0.8623 на датасете 100K.\n",
    "\n",
    "Попробуем обучить модель с этими параметрами на датасете 1М."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>title</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>One Flew Over the Cuckoo's Nest (1975)</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>James and the Giant Peach (1996)</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>My Fair Lady (1964)</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId                                   title  rating\n",
       "0       1  One Flew Over the Cuckoo's Nest (1975)       5\n",
       "1       1        James and the Giant Peach (1996)       3\n",
       "2       1                     My Fair Lady (1964)       3"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_movies = pd.read_csv('movielens_1m\\\\movies.dat', delimiter=\"::\", \n",
    "                        names=['movieId', 'title', 'genres'], engine='python')\n",
    "df_ratings = pd.read_csv('movielens_1m\\\\ratings.dat', delimiter=\"::\", \n",
    "                         names=['userId', 'movieId', 'rating', 'timestamp'], engine='python')\n",
    "df_ratings = df_ratings.merge(df_movies.loc[:, ['movieId', 'title']], \n",
    "                              on='movieId', how='left').loc[:, ['userId', 'title', 'rating']]\n",
    "df_ratings[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating RMSE, MAE of algorithm SVD on 5 split(s).\n",
      "\n",
      "                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     \n",
      "RMSE (testset)    0.8576  0.8589  0.8693  0.8638  0.8645  0.8628  0.0042  \n",
      "MAE (testset)     0.6570  0.6556  0.6642  0.6632  0.6623  0.6605  0.0035  \n",
      "Fit time          27.79   20.70   20.11   19.98   19.87   21.69   3.06    \n",
      "Test time         0.15    0.16    0.16    0.23    0.16    0.17    0.03    \n"
     ]
    }
   ],
   "source": [
    "model_svd = SVD(n_factors=200, n_epochs=40, lr_all=0.05)\n",
    "crosvall_result = cross_validate(model_svd, data, cv=5, verbose=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Построим простую рекомендательную систему, которая найдет ТОП-10 похожих фильмов с самой высокой оценкой для пользователя с идентификатором 414."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def genre_prepare(genre):\n",
    "    genre = ' '.join(genre.replace('-', '').replace(' ', '').lower().split('|'))\n",
    "    return genre\n",
    "\n",
    "def get_user_rating(user_id, title):\n",
    "    \"\"\" Поиск оценки пользователя для фильма в датасете MovieLens\n",
    "    \n",
    "    Параметры\n",
    "    ---------\n",
    "    user_id : int\n",
    "      Идентификатор пользователя\n",
    "    title : str\n",
    "      Название фильма\n",
    "    \"\"\"\n",
    "    user_id = int(user_id)\n",
    "    user_rating = list(df_ratings.loc[(df_ratings.userId == user_id) &\n",
    "                                      (df_ratings.title == title), 'rating'].values)\n",
    "    user_rating = user_rating[0] if user_rating else None\n",
    "    return user_rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "      <th>genres_mod</th>\n",
       "      <th>user_rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Animation|Children's|Comedy</td>\n",
       "      <td>animation children's comedy</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children's|Fantasy</td>\n",
       "      <td>adventure children's fantasy</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "      <td>comedy romance</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>comedy drama</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>comedy</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movieId                               title                        genres  \\\n",
       "0        1                    Toy Story (1995)   Animation|Children's|Comedy   \n",
       "1        2                      Jumanji (1995)  Adventure|Children's|Fantasy   \n",
       "2        3             Grumpier Old Men (1995)                Comedy|Romance   \n",
       "3        4            Waiting to Exhale (1995)                  Comedy|Drama   \n",
       "4        5  Father of the Bride Part II (1995)                        Comedy   \n",
       "\n",
       "                     genres_mod  user_rating  \n",
       "0   animation children's comedy          NaN  \n",
       "1  adventure children's fantasy          NaN  \n",
       "2                comedy romance          NaN  \n",
       "3                  comedy drama          NaN  \n",
       "4                        comedy          NaN  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_movies['genres_mod'] = df_movies['genres'].apply(genre_prepare)\n",
    "df_movies['user_rating'] = df_movies.apply(lambda x: get_user_rating(user_id=414, title=x.title), axis=1)\n",
    "df_movies[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Создадим датафрейм с фильмами, у которых нет оценки пользователя. То есть эти фильмы предположительно пользователь не смотрел. У нас нет информации о контакте пользователя с контентом без выставления ему оценки, поэтому делаем такое упрощенное предположение."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "      <th>genres_mod</th>\n",
       "      <th>user_rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Animation|Children's|Comedy</td>\n",
       "      <td>animation children's comedy</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children's|Fantasy</td>\n",
       "      <td>adventure children's fantasy</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "      <td>comedy romance</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>comedy drama</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>comedy</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movieId                               title                        genres  \\\n",
       "0        1                    Toy Story (1995)   Animation|Children's|Comedy   \n",
       "1        2                      Jumanji (1995)  Adventure|Children's|Fantasy   \n",
       "2        3             Grumpier Old Men (1995)                Comedy|Romance   \n",
       "3        4            Waiting to Exhale (1995)                  Comedy|Drama   \n",
       "4        5  Father of the Bride Part II (1995)                        Comedy   \n",
       "\n",
       "                     genres_mod  user_rating  \n",
       "0   animation children's comedy          NaN  \n",
       "1  adventure children's fantasy          NaN  \n",
       "2                comedy romance          NaN  \n",
       "3                  comedy drama          NaN  \n",
       "4                        comedy          NaN  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_no_watched_user_movies = df_movies[df_movies.user_rating.isna()]\n",
    "df_no_watched_user_movies[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Создаем матрицу TF-IDF для жанров непросмотренных фильмов "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "tfidf = TfidfVectorizer()\n",
    "no_watched_user_movies_tfidf = tfidf.fit_transform(df_no_watched_user_movies['genres_mod']).todense()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Предположим, что пользователь посмотрел фильм \"Requiem for a Dream (2000)\" и требуется предложить ТОП-10 похожих фильмов с максимальной предсказанной пользователськой оценкой. И эти фильмы не должны быть просмотрены пользователем."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "      <th>genres_mod</th>\n",
       "      <th>user_rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>315</th>\n",
       "      <td>318</td>\n",
       "      <td>Shawshank Redemption, The (1994)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.249196</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1028</th>\n",
       "      <td>1041</td>\n",
       "      <td>Secrets &amp; Lies (1996)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.247093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3399</th>\n",
       "      <td>3468</td>\n",
       "      <td>Hustler, The (1961)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.224298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3132</th>\n",
       "      <td>3201</td>\n",
       "      <td>Five Easy Pieces (1970)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.188433</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3278</th>\n",
       "      <td>3347</td>\n",
       "      <td>Never Cry Wolf (1983)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.150602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2291</th>\n",
       "      <td>2360</td>\n",
       "      <td>Celebration, The (Festen) (1998)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.133238</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>942</th>\n",
       "      <td>954</td>\n",
       "      <td>Mr. Smith Goes to Washington (1939)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.131595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2890</th>\n",
       "      <td>2959</td>\n",
       "      <td>Fight Club (1999)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.098818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2260</th>\n",
       "      <td>2329</td>\n",
       "      <td>American History X (1998)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.090155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3155</th>\n",
       "      <td>3224</td>\n",
       "      <td>Woman in the Dunes (Suna no onna) (1964)</td>\n",
       "      <td>Drama</td>\n",
       "      <td>drama</td>\n",
       "      <td>4.080022</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      movieId                                     title genres genres_mod  \\\n",
       "315       318          Shawshank Redemption, The (1994)  Drama      drama   \n",
       "1028     1041                     Secrets & Lies (1996)  Drama      drama   \n",
       "3399     3468                       Hustler, The (1961)  Drama      drama   \n",
       "3132     3201                   Five Easy Pieces (1970)  Drama      drama   \n",
       "3278     3347                     Never Cry Wolf (1983)  Drama      drama   \n",
       "2291     2360          Celebration, The (Festen) (1998)  Drama      drama   \n",
       "942       954       Mr. Smith Goes to Washington (1939)  Drama      drama   \n",
       "2890     2959                         Fight Club (1999)  Drama      drama   \n",
       "2260     2329                 American History X (1998)  Drama      drama   \n",
       "3155     3224  Woman in the Dunes (Suna no onna) (1964)  Drama      drama   \n",
       "\n",
       "      user_rating  \n",
       "315      4.249196  \n",
       "1028     4.247093  \n",
       "3399     4.224298  \n",
       "3132     4.188433  \n",
       "3278     4.150602  \n",
       "2291     4.133238  \n",
       "942      4.131595  \n",
       "2890     4.098818  \n",
       "2260     4.090155  \n",
       "3155     4.080022  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Находим жанр просмотренного фильма\n",
    "watсhed_film_genre = df_movies.loc[df_movies.title == \"Requiem for a Dream (2000)\", 'genres_mod']\n",
    "# Находим матрицу TF-IDF для просмотренного фильма \n",
    "watсhed_film_genre_tfidf = tfidf.transform(watсhed_film_genre).todense()\n",
    "\n",
    "# Расчитаем косинусное сходство между просмотренным фильмом и остальными непросмотренными\n",
    "cosine_result = cosine_similarity(no_watched_user_movies_tfidf, watсhed_film_genre_tfidf).flatten()\n",
    "\n",
    "# Находим 500 индексов фильмов, которые наибольшим образом похожи на просмотренный фильм\n",
    "top_film_index = np.argsort(cosine_result)[::-1][:500]\n",
    "\n",
    "# Для 500 наиболее похожих фильмов находим рейтинг пользователя и выдаем ТОП-10 с наибольшей оценкой пользователя\n",
    "df_user_movies_recommend = df_no_watched_user_movies.iloc[top_film_index, :].copy()\n",
    "df_user_movies_recommend['user_rating'] = df_user_movies_recommend['title'].apply(lambda x: model_svd.predict(uid='414', iid=x).est)\n",
    "df_user_movies_recommend.sort_values(by='user_rating', ascending=False)[:10]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
